/*==========================================================*
Belonging to paper: 	Too much of a good thing? Estimating the trade-off between voter turnout and the representativeness of election results when studying turnout shocks.
Authors: 				Harm Rienks
Date:					August 2024

The materials here will replicate the main results of the paper "Too much of a good thing? Estimating the trade-off between voter turnout and the representativeness of election results when studying turnout shocks.", being Figure 2 and Tables 1 and 3. 
Note that in the paper we also use data from a LISS panel survey called "Wiv Referendum 2018: Third measurement". This data is not required for running this do-file. The agency that ran this survey prefers it when researchers directly download this (opensource) data from their website:  https://www.dataarchive.lissdata.nl/study-units/view/815 (doi: 10.17026/dans-xcr-n8az)

To run the file,  the following Stata packages will need to be installed
ssc install ivreg2, replace
ssc install ranktest, replace
ssc install outreg2, replace	

--> The replicator needs to adjust line 23 to run the file. 
*===========================================================*/

capture log close
clear 	all

*adopath + "X:\My Desktop\Stata users updates"
set matsize 8000

*Define directory
cd 			"\\WURNET.NL\Homes\rienk005\AppData\FolderRedirection\Desktop\submission referendum paper to elect studies\Publishing dataset"

*Set logfile
log using 	"Too much of a good thing log.smcl", replace

*Load dataset 
use "Data paper too much of.dta"

* ----------------------------------------------------------------------
* Exclude Dutch Caribbean islands from analysis
* ----------------------------------------------------------------------

drop if gemeente == "Saba" | gemeente == "Bonaire" | gemeente == "Sint Eustatius"

* ----------------------------------------------------------------------
* Create variables
* ----------------------------------------------------------------------

generate eligible_voters_wiv = wiv_ref_kiesgerechtigden
generate valid_votes_wiv = wiv_ref_geldigestemmen

* Creating variables as percentage of eligiable voters (pev=percentage of eligible voters)
generate 		wiv_ref_turn_pev = 100 * (wiv_ref_opkomst  / wiv_ref_kiesgerechtigden )
label variable	wiv_ref_turn_pev 				"Referendum turnout (%)"
label variable	wiv_ref_kiesgerechtigden 		"Eligible voters ISSA"
gen				wiv_ref_eligible_voters_mln = wiv_ref_kiesgerechtigden / 10000 
label variable	wiv_ref_eligible_voters_mln 	"Eligible voters ISSA (in 10 000s)"

* Creating variables as percentage of total votes cast (ptv=percentage of total votes cast)
generate 		wiv_ref_yes_ptv  = 100 * (wiv_ref_voor / (wiv_ref_voor + wiv_ref_tegen) )
label variable	wiv_ref_yes_ptv  "Yes votes (% total yes/no votes cast)"
generate 		wiv_ref_no_ptv  = 100 * (wiv_ref_tegen / (wiv_ref_voor + wiv_ref_tegen) )
label variable	wiv_ref_no_ptv   "No votes (% total yes/no votes cast)"

generate		vot_loc_cap			= aantal_stemb_verk_2018 / (wiv_ref_kiesgerechtigden/1000)
label variable	vot_loc_cap			"Voting locations per 1000 eligible voters"

generate 		gender = 100 * ( aantalmannen ) / ( aantalmannen + aantalvrouwen )
label variable	gender 				"Male population (%)"

generate 		age_above45 = 100 * ( age45_65 + age65_80 + age80andup ) / ( jongerdan5jaar + age5_10 + age10_15 + age15_20 + age20_25 + age25_45 + age45_65 + age65_80 + age80andup )
label variable	age_above45			"Aged 45 and over (%)"

generate 		age_above65 = 100 * ( age65_80 + age80andup ) / ( jongerdan5jaar + age5_10 + age10_15 + age15_20 + age20_25 + age25_45 + age45_65 + age65_80 + age80andup )
label variable	age_above65 		"Aged 65 and over (%)"

*Generate variable that assigns weight to a municipality based on the number of votes cast in that municipality. 6709503 is the total number of (valid) votes cast in the referendum 
generate weight_mun_outcome_wiv = wiv_ref_geldigestemmen / 6709503
label variable	weight_mun_outcome_wiv "number of votes cast in municipality as perc of all valid votes ISSA referendum"

generate wiv_ref_yes_weighted = wiv_ref_yes_ptv * weight_mun_outcome_wiv
label variable	wiv_ref_yes_weighted "number of yes votes per municipality as % of all valid votes ISSA referendum"

generate wiv_ref_no_weighted = wiv_ref_no_ptv * weight_mun_outcome_wiv
label variable	wiv_ref_no_weighted "number of yes votes per municipality as % of all valid votes ISSA referendum"

*Aggregating parties  in favor of ISSA
generate 		parties_pro_wiv_ptv = 100 * (( vvd + pvvpartijvoordevrijheid + cda + democraten66d66 + partijvandearbeidpvda + christenunie + plus + staatkundiggereformeerdepartij) / geldigestemmen )
label variable	parties_pro_wiv_ptv "Share of parties in favor of ISSA (%)"
*Note: D66 voted against the wiv when the law was voted for in parlement in 2017. PVV has indicated that it would support the outcome of the referendum. 

*Rename variables
rename 			gemiddeldepersoonlijkinkomenp average_income_per_person
label variable	average_income_per_person "Average income per person"
rename 			beroepsbevolkingwerkloosheidspe unemployment_perc
label variable	unemployment_perc "Unemployment (%)"

*****************************************************************************************************************************************
*				Section 3: Case description																								*
*****************************************************************************************************************************************

* Figure 1
egen mean_yes_no_grv = mean(wiv_ref_yes_ptv) if mun_also_grv_2018 ==0
egen mean_yes_also_grv = mean(wiv_ref_yes_ptv) if mun_also_grv_2018 ==1

scatter wiv_ref_yes_ptv  wiv_ref_turn_pev if mun_also_grv_2018 ==0, mcolor(navy) text(68 32 "No concurrent elections") legend(off) ytitle(Yes votes (% of votes cast)) || scatter wiv_ref_yes_ptv  wiv_ref_turn_pev if mun_also_grv_2018 ==1, mcolor(ltblue) text(68 60 "Concurrent elections") || (lfit wiv_ref_yes_ptv wiv_ref_turn_pev)
graph export Figure2redone.wmf, replace

drop mean_yes_no_grv mean_yes_also_grv


*****************************************************************************************************************************************
*				Section 4: Model, identification and data																				*
*****************************************************************************************************************************************

* Table 1
outreg2	using "Table 1 - Descriptive statistics", replace sum(detail) eqkeep(N mean sd min max) keep(wiv_ref_turn_pev wiv_ref_yes_ptv mun_also_grv_2018 wiv_ref_eligible_voters_mln gender age_above45 unemployment_perc  average_income_per_person parties_pro_wiv_ptv vot_loc_cap mun_ref) dec(2) stats(coef se) aster  word label


*****************************************************************************************************************************************
*				Section 5: Results																										*
*****************************************************************************************************************************************

* Table 3, column 1 (first-stage without control variables)
regress	wiv_ref_turn_pev		mun_also_grv_2018						[aweight=eligible_voters_wiv], robust
outreg2 using "Table 3", 		///
		keep(mun_also_grv_2018)         ///
		dec(2) stats(coef se) aster replace  word label 	///
		title(Table 3 - effect of consecutive elections on referendum turnout and on Yes votes)								///
		ctitle(Turnout no contols)		///
		addnote(OLS estimates)

* Table 3, column 2 (second-stage without control variables)
ivreg2         wiv_ref_yes_ptv (wiv_ref_turn_pev = mun_also_grv_2018)	[aweight=eligible_voters_wiv], robust
outreg2 using "Table 3 part 2", 	///
		dec(2) stats(coef se) aster replace  word label		///
		title(Table 3 - effect of referendum turnout on Yes votes)	///
		addnote(IV estimates)				///
		addstat(Kleibergen-Paap Wald rk F, `e(widstat)')	///
		ctitle(Yes votes (%))	

* Table 3, column 3 (first-stage with control variables)
regress	wiv_ref_turn_pev 		mun_also_grv_2018 parties_pro_wiv_ptv gender age_above45 average_income_per_person unemployment_perc wiv_ref_eligible_voters_mln vot_loc_cap mun_ref						[aweight=eligible_voters_wiv], robust
predict turnout_predicted_partSm, xb
predictnl turnout_no_conc_elec_Nr = _b[_cons] + _b[parties_pro_wiv_ptv]*parties_pro_wiv_ptv+ _b[gender]*gender + _b[age_above45]*age_above45+ _b[average_income_per_person]*average_income_per_person + _b[unemployment_perc]*unemployment_perc + _b[wiv_ref_eligible_voters_mln]*wiv_ref_eligible_voters_mln + _b[vot_loc_cap]*vot_loc_cap+ _b[mun_ref]*mun_ref
predictnl turnout_all_conc_elec_partNm = (_b[_cons] + _b[parties_pro_wiv_ptv]*parties_pro_wiv_ptv+ _b[gender]*gender + _b[age_above45]*age_above45+ _b[average_income_per_person]*average_income_per_person + _b[unemployment_perc]*unemployment_perc + _b[wiv_ref_eligible_voters_mln]*wiv_ref_eligible_voters_mln + _b[vot_loc_cap]*vot_loc_cap+ _b[mun_ref]*mun_ref + _b[mun_also_grv_2018]*1)
outreg2 using "Table 3", 		///
		keep(mun_also_grv_2018 parties_pro_wiv_ptv gender age_above45 average_income_per_person unemployment_perc wiv_ref_eligible_voters_mln vot_loc_cap mun_ref)         ///
		dec(2) stats(coef se) aster append  word label 	///
		title(Table 3 - effect of consecutive elections on referendum turnout and on Yes votes)									///
		ctitle(Turnout with controls)		///
		addnote(OLS estimates)

mean turnout_predicted_partSm turnout_no_conc_elec_Nr turnout_all_conc_elec_partNm [aweight=eligible_voters_wiv] 
/// We now know, respectively, the predicted turnout in the case that only those municipalities that had concurrent elections had concurrent elections  which we use to calculate Sm, Nr, and the predicted turnout in the case that all municipalities had concurrent elections which is used to calculate Nm

* Table 3, column 4 (second-stage with control variables)		
ivreg2         wiv_ref_yes_ptv  parties_pro_wiv_ptv gender age_above45 average_income_per_person unemployment_perc wiv_ref_eligible_voters_mln vot_loc_cap mun_ref (wiv_ref_turn_pev = mun_also_grv_2018)	[aweight=eligible_voters_wiv], robust
predict yes_predicted, xb
outreg2 using "Table 3 part 2", 	///
		keep(wiv_ref_turn_pev 	parties_pro_wiv_ptv gender age_above45 average_income_per_person unemployment_perc wiv_ref_eligible_voters_mln vot_loc_cap mun_ref mun_also_grv_2018) ///
		dec(2) stats(coef se) aster append word label		///
		title(Table 3 - effect of referendum turnout on Yes votes)	///
		addnote(IV estimates)				///
		addstat(Kleibergen-Paap Wald rk F, `e(widstat)')	///
		ctitle(Yes votes (%))	

gen yes_no_conc_elec_VrA 						= yes_predicted
replace yes_no_conc_elec_VrA					= yes_predicted - (23.48749 * 0.2706916) if mun_also_grv_2018 == 1
mean yes_no_conc_elec_VrA [aweight=eligible_voters_wiv] // We now know VrA

* Close log file
log close

/*
